library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(countrycode)
df <- read_csv("survey_results_public.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Respondent = col_double(),
##   Age = col_double(),
##   CompTotal = col_double(),
##   ConvertedComp = col_double(),
##   WorkWeekHrs = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
DFF <- select(df, MainBranch, Country,Gender,Age, YearsCode, YearsCodePro ,EdLevel,Employment, JobSat, OrgSize, WorkWeekHrs, NEWOvertime, NEWOnboardGood, JobSeek, NEWLearn, LanguageWorkedWith,PlatformWorkedWith)
DF_USA <- DFF[DFF$Country == "United States" & !is.na(DFF$Country) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][3:17]
DF_LC <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
                            origin = "country.name",
                            destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_LC <- DF_LC[DF_LC$region == "Latin America & Caribbean" & !is.na(DF_LC$region) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][, 4:18]
DF_MIX <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
                            origin = "country.name",
                            destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_MIX <- DF_MIX[((DF_MIX$region == "Latin America & Caribbean" & !is.na(DF_MIX$region)) | (DF_MIX$Country == "United States" & !is.na(DF_MIX$Country))) &  DF_MIX$MainBranch == "I am a developer by profession" & !is.na(DF_MIX$MainBranch), ][, 3:18]
DF_MIX$Country[DF_MIX$Country != "United States"] <- "América Latina"
DF_MIX$Country[DF_MIX$Country == "United States"] <- "Estados Unidos"
colnames(DF_MIX)[which(names(DF_MIX) == "Country")] <- "Region"
DF_MIX

Tabla Descriptores

Pregunta Variable Tipo de Variable
Which of the following describe you, if any? Please check all that apply. If you prefer not to answer, you may leave this question blank. Genero Nominal
What is your age (in years)? If you prefer not to answer, you may leave this question blank. Edad Discreta
Including any education, how many years have you been coding in total? Años codeando Discreta

Análisis de variables

Genero

temp = DF_USA$Gender[!is.na(DF_USA$Gender)]
gender_vec_USA <- vector()
gender_vec_LC <- vector()
for (pal in temp) {
  for (sub_pal in strsplit(pal, ";")) {
    gender_vec_USA <- c(gender_vec_USA, sub_pal)
  }
}
temp = DF_LC$Gender[!is.na(DF_LC$Gender)]
gender_vec <- vector()
for (pal in temp) {
  for (sub_pal in strsplit(pal, ";")) {
    gender_vec_LC <- c(gender_vec_LC, sub_pal)
  }
}

layout(matrix(c(1,2,3,3), ncol=2, byrow=TRUE), heights=c(6, 1))
par(mai=rep(0.5, 4))

temp = table(gender_vec_USA)
man <- round(temp[1]/length(gender_vec_USA)*100,2)
other <- round(temp[2]/length(gender_vec_USA)*100,2)
woman <- round(temp[3]/length(gender_vec_USA)*100,2)
pie(table(gender_vec_USA), main = "Estados Unidos", labels = c(paste(man, "%", sep = ""), paste(other, "%", sep = ""), paste(woman, "%", sep = "")), col = c("skyblue","orange","red"))
temp = table(gender_vec_LC)

man <- round(temp[1]/length(gender_vec_LC)*100,2)
other <- round(temp[2]/length(gender_vec_LC)*100,2)
woman <- round(temp[3]/length(gender_vec_LC)*100,2)
pie(table(gender_vec_LC), main = "América Latina", labels = c(paste(man, "%", sep = ""), paste(other, "%", sep = ""), paste(woman, "%", sep = "")), col = c("skyblue","orange","red"))
par(mai=c(0,0,0,0))
plot.new()
legend(x="center", ncol=3,legend=c("Hombre","Otros","Mujer"),
       fill=c("skyblue","orange","red"))

Edad

boxplot(DF_MIX$Age~DF_MIX$Region, na.rm= T, xlab = "Region", ylab = "Edad")
abline(h=mean(DF_USA$Age, na.rm = TRUE), col = "red", lwd=2)
abline(h=mean(DF_LC$Age, na.rm = TRUE), col = "blue",lwd=2)
legend(1.2,100,legend=c("media EEUU", "Media SyC"),col=c("red", "blue"),lwd=2)

Podemos observar que las edades de los programadores profesionales están más concentradas en América Latina comparando los rangos intercuartílicos:

IQR(DF_LC$Age, na.rm = TRUE)
## [1] 9
IQR(DF_USA$Age, na.rm = TRUE)
## [1] 12

Además, la edad promedio de los programadores profesionales de América Latina es de 30.14 mientras que el de Estados Unidos es de 34.33,

round(mean(DF_LC$Age, na.rm = TRUE), 2)
## [1] 30.14
round(mean(DF_USA$Age, na.rm = TRUE),2)
## [1] 34.33

Junto con las medianas podemos concluir que en general, los programadores profesiones de Estados Unidos son mayores que que los de América Latina.

round(median(DF_LC$Age, na.rm = TRUE), 2)
## [1] 28.5
round(median(DF_USA$Age, na.rm = TRUE),2)
## [1] 32

Años Codeado

boxplot(as.numeric(DF_MIX$YearsCode)~DF_MIX$Region, na.rm= TRUE, xlab = "Region", ylab = "Años Codeando")
abline(h=mean(as.numeric(DF_USA$YearsCode), na.rm = TRUE), col = "red")
abline(h=mean(as.numeric(DF_LC$YearsCode), na.rm = TRUE), col = "blue")
legend(1.1,50,legend=c("media EEUU", "Media SyC"),col=c("red", "blue"),lwd=2)

En el diagrama de cajas y bigotes podemos ver como los programadores profesionales de Estados Unidos llevan mas años programando en total en comparación con los de América Latina.

También, es importante destacar la gran diferencia de 6 años que existe entre el 3er cuartíl de ambas regiones. Esto implica que el 75% de los programadores profesionales de Estados Unidos cuentan a lo mucho con 22 años programando mientras que en América latina solo 16.

round(quantile(as.numeric(DF_LC$YearsCode), na.rm = TRUE), 2)
##   0%  25%  50%  75% 100% 
##    1    7   10   16   50
round(quantile(as.numeric(DF_USA$YearsCode), na.rm = TRUE),2)
##   0%  25%  50%  75% 100% 
##    1    8   14   22   50

Años codeado profesionalmente

Grado educativo

Tipo de empleo

Satisfacción con empleo

Tamaño de organizacion en la que trabaja

Horas trabajadas semanalmente

Frecuencia de horas extra en el trabajo